# Welche Typen sind enthalten?sapply(crimes.df.raw, class)
DR_NO Date Rptd DATE OCC TIME OCC AREA
"numeric" "character" "character" "character" "character"
AREA NAME Rpt Dist No Part 1-2 Crm Cd Crm Cd Desc
"character" "character" "numeric" "numeric" "character"
Mocodes Vict Age Vict Sex Vict Descent Premis Cd
"character" "numeric" "character" "character" "numeric"
Premis Desc Weapon Used Cd Weapon Desc Status Status Desc
"character" "numeric" "character" "character" "character"
Crm Cd 1 Crm Cd 2 Crm Cd 3 Crm Cd 4 LOCATION
"numeric" "numeric" "numeric" "logical" "character"
Cross Street LAT LON
"character" "numeric" "numeric"
Modus Operandi –> richtige Nummern zum zuordnen mit der Tabelle von MO-Codes
Datum Format
Es existieren NAs in manchen Spalten
Nummern als String –> Nummern
In manchen Spalten befinden sich NAs, dazu gehören:
Weapon
Weapon Descd
Crime Codes –> nicht alle Taten
Überprüfung Aussagen der Metadaten:
Crm Cd should be the same as Crm Cd 1
Crm Cd 1 sollte die gleichen Werte wie Crm Cd haben:
Crm Cd
Indicates the crime committed. (Same as Crime Code 1)
Es gibt aber 1956 unterschiedliche Werte –> todo analyse später
Part 1-2 Weg löschen ?!
Area = Area Name ?!
Premise Cd = Premise Desc ?!
Weapon Use Cd = Weapon Desc ?!
3. Transformation & Bearbeitung
Aufgabenstellung (15 Pkt.)
Umcodierung von Daten, z.B. numerisch in kategorial
Subsetting der Daten
Joining von Datentabellen - falls nötig. Welcher Join ist notwendig? Warum?
Übersicht der transformierten Daten. Sie können hierzu Hilfsmittel wie glimpse(), skim() und head() benutzen, um Ihre Erläuterungen zu veranschaulichen.
Sind die sich ergebenden Daten so, wie Sie es erwartet haben? Warum oder warum nicht?
Aufbereitung / Umcodierung
# Aufbereitung der Liste mit Codes zur Zuordnung der Modus Operandicodes_to_numeric <-function(x) {if (is.na(x)) {return(NA) } else {return(as.numeric(strsplit(x, " ")[[1]])) }}# Transformation der Daten zu sinnvollen Datentypencrimes.df <-transform(crimes.df.raw,`Date Rptd`=as.Date(substr(`Date Rptd`, 1, 10), format ="%m/%d/%Y"),`DATE OCC`=as.Date(substr(`DATE OCC`, 1, 10), format ="%m/%d/%Y"),`TIME OCC`=as.integer(`TIME OCC`),`AREA`=as.integer(`AREA`),`Rpt Dist No`=as.integer(`Rpt Dist No`),`Crm Cd`=as.integer(`Crm Cd`),`Mocodes`=lapply(Mocodes, codes_to_numeric))
crmCd.diff <-which(crimes.df["Crm Cd"] != crimes.df["Crm Cd 1"])length(crmCd.diff)
[1] 1956
# Überprüfen, ob die Spalte nur NAs enthältif (all(is.na(crimes.df[["Crm Cd 4"]]))) { crimes.df[["Crm Cd 4"]] <-NULL}
# Löschen von Part 1-2# Todo Begründungcrimes.df[["Part 1-2"]] <-NULL
In Klassifizierung der Straftaten sind nicht alle Straftaten enthalten
missing_codes <- crimes.df %>%anti_join(crmcd.categories, by =c("Crm Cd"="Code")) %>%select(`Crm Cd`, `Crm Cd Desc`) %>%distinct()
missing_categories <- tibble::tribble(~Category, ~Code, ~Description,"PERSONAL THEFT", 354, "THEFT OF IDENTITY","SEX OFFENSES", 812, "CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)","SEX OFFENSES", 956, "LETTERS, LEWD - TELEPHONE CALLS, LEWD", "OTHER THEFT", 668, "EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)","SEX OFFENSES", 813, "CHILD ANNOYING (17YRS & UNDER)","SEX OFFENSES", 762, "LEWD CONDUCT","FRAUD", 662, "BUNCO, GRAND THEFT", "SEX OFFENSES", 860, "BATTERY WITH SEXUAL CONTACT","CYBER CRIME", 661, "UNAUTHORIZED COMPUTER ACCESS","SEX OFFENSES", 810, "SEX,UNLAWFUL(INC MUTUAL CONSENT, PENETRATION W/ FRGN OBJ","ORDER VIOLATIONS", 901, "VIOLATION OF RESTRAINING ORDER","VANDALISM", 740, "VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)","OTHER", 946, "OTHER MISCELLANEOUS CRIME","FORGERY/FRAUD", 649, "DOCUMENT FORGERY / STOLEN FELONY","SEX OFFENSES", 845, "SEX OFFENDER REGISTRANT OUT OF COMPLIANCE","VANDALISM", 745, "VANDALISM - MISDEAMEANOR ($399 OR UNDER)","FORGERY/FRAUD", 653, "CREDIT CARDS, FRAUD USE ($950.01 & OVER)","OTHER", 940, "EXTORTION", "ARSON", 648, "ARSON","DISORDERLY CONDUCT", 886, "DISTURBING THE PEACE","FRAUD", 666, "BUNCO, ATTEMPT","HUMAN TRAFFICKING", 921, "HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE","SEX OFFENSES", 805, "PIMPING","SEX OFFENSES", 932, "PEEPING TOM","ORDER VIOLATIONS", 900, "VIOLATION OF COURT ORDER", "ORDER VIOLATIONS", 903, "CONTEMPT OF COURT","FALSE REPORT", 439, "FALSE POLICE REPORT","OTHER", 954, "CONTRIBUTING", "OTHER", 434, "FALSE IMPRISONMENT","FORGERY/FRAUD", 654, "CREDIT CARDS, FRAUD USE ($950 & UNDER", "KIDNAPPING", 922, "CHILD STEALING","SEX OFFENSES", 760, "LEWD/LASCIVIOUS ACTS WITH CHILD","OTHER THEFT", 670, "EMBEZZLEMENT, PETTY THEFT ($950 & UNDER)","SEX OFFENSES", 850, "INDECENT EXPOSURE","CHILD NEGLECT", 237, "CHILD NEGLECT (SEE 300 W.I.C.)","TRESPASSING", 888, "TRESPASSING","WEAPONS", 753, "DISCHARGE FIREARMS/SHOTS FIRED","HUMAN TRAFFICKING", 822, "HUMAN TRAFFICKING - COMMERCIAL SEX ACTS","SEX OFFENSES", 806, "PANDERING","WEAPONS", 906, "FIREARMS RESTRAINING ORDER (FIREARMS RO)","MVT (GTA)", 522, "VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC)", "TRAFFIC", 890, "FAILURE TO YIELD","OTHER", 755, "BOMB SCARE","FRAUD", 664, "BUNCO, PETTY THEFT","THEFT", 951, "DEFRAUDING INNKEEPER/THEFT OF SERVICES, $950 & UNDER","KIDNAPPING", 920, "KIDNAPPING - GRAND ATTEMPT","ORDER VIOLATIONS", 902, "VIOLATION OF TEMPORARY RESTRAINING ORDER","FORGERY/FRAUD", 651, "DOCUMENT WORTHLESS ($200.01 & OVER)","KIDNAPPING", 910, "KIDNAPPING","SEX OFFENSES", 814, "CHILD PORNOGRAPHY","WEAPONS", 756, "WEAPONS POSSESSION/BOMBING","WEAPONS", 931, "REPLICA FIREARMS(SALE,DISPLAY,MANUFACTURE OR DISTRIBUTE)", "TRAFFIC", 438, "RECKLESS DRIVING","FORGERY/FRAUD", 660, "COUNTERFEIT", "THEFT", 950, "DEFRAUDING INNKEEPER/THEFT OF SERVICES, OVER $950.01","OTHER", 943, "CRUELTY TO ANIMALS","OTHER", 949, "ILLEGAL DUMPING","OTHER", 933, "PROWLER", "DRUG OFFENSES", 865, "DRUGS, TO A MINOR","FORGERY/FRAUD", 652, "DOCUMENT WORTHLESS ($200 & UNDER)","OTHER THEFT", 446, "PETTY THEFT - AUTO REPAIR","OTHER", 944, "CONSPIRACY","OTHER THEFT", 349, "GRAND THEFT / AUTO REPAIR","OTHER", 942, "BRIBERY", "OTHER THEFT", 347, "GRAND THEFT / INSURANCE FRAUD","CHILD NEGLECT", 870, "CHILD ABANDONMENT","OTHER", 880, "DISRUPT SCHOOL","OTHER", 924, "TELEPHONE PROPERTY - DAMAGE","SEX OFFENSES", 840, "BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM","OTHER", 948, "BIGAMY","DISORDERLY CONDUCT", 884, "FAILURE TO DISPERSE","WEAPONS", 904, "FIREARMS EMERGENCY PROTECTIVE ORDER (FIREARMS EPO)","SEX OFFENSES", 830, "INCEST (SEXUAL ACTS BETWEEN BLOOD RELATIVES)","OTHER", 432, "BLOCKING DOOR INDUCTION CENTER","DISORDERLY CONDUCT", 882, "INCITING A RIOT")# print(missing_categorie, n = nrow(missing_categorie))
DR_NO codes_str
1 190326475 NA
2 200106753 1822, 1402, 344
3 200320258 344, 1251
4 200907217 325, 1501
5 220614831 1822, 1501, 930, 2004
6 231808869 1822, 100, 930, 929
meanings_str
1 NA
2 Stranger, Evidence Booked (any crime), Removes vict property
3 Removes vict property, Victim was a student
4 Took merchandise, Other MO (see rpt)
5 Stranger, Other MO (see rpt), Unauthorized use of victim's credit/debit card or number, Suspect is homeless/transient
6 Stranger, Suspect Impersonate, Unauthorized use of victim's credit/debit card or number, Unauthorized use of victim's bank account information
Subsetting
Area
Geschlecht
Abstammung
Art des Crimes
Übersicht des Dataframes
Fazit - Transformation & Bearbeitung
4. Geeignete Visualisierung und Aggregation der Daten
Aufgabenstellung (15 Pkt.)
Fassen Sie die Daten in einer geeigenten Form zur Beantwortung Ihrer formulierten Fragestellung zusammen. Ziehen Sie auch geeignete Visualisierungen der transformierten und/oder aggregierten Daten heran, um Ihre Aussagen entsprechend zu untermauern oder zu veranschaulichen.
Hier könne Sie auch geeignete statistische Verfahren bzw. Modellierungen nutzen, falls diese Ihnen bezüglich Ihrer Fragestellung weiterhelfen.
Welche Klassen von Straftaten werden wie häufig begangen?
Anzahl Proportional zu Schweregrad
Viele Diebstahle & andere Straftaten um Geld zu beschaffen
# Bibliothek ladenlibrary(leaflet)# Eine interaktive OpenStreetMap-Karte erstellenleaflet(crimes.df.no_id_theft) %>%addTiles() %>%# Standard-OSM-KartesetView(lng =-118.2437, lat =34.0522, zoom =9) %>%# Ansicht auf Los AngelesaddCircleMarkers(~LON, ~LAT, radius =5, color ="blue", stroke =FALSE, fillOpacity =0.8, popup =~paste("ID:", `Crm Cd Desc`)) # Popup mit der DR_NO ID
library(leaflet.extras)leaflet(crimes.df) %>%addTiles() %>%# Grundkarte hinzufügensetView(lng =-118.2437, lat =34.0522, zoom =9) %>%addHeatmap(lng =~LON,lat =~LAT,intensity =nrow(crimes.df), # Gewichtung (z.B. Anzahl der Vorfälle, falls vorhanden)blur =20, # Blur-Faktormax =0.05, # Maximale Intensitätradius =10# Radius für die Heatmap-Punkte )